{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "34ef9983-7fe2-423f-a021-e8442b1648b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import findspark\n",
    "from pyspark.sql import SparkSession\n",
    "import pyspark.sql.functions as F\n",
    "import pyspark.sql.types as T\n",
    "import pandas as pd\n",
    "from functools import reduce\n",
    "\n",
    "findspark.init()\n",
    "\n",
    "spark = SparkSession.builder.master(\"local[*]\").appName(\"customize transformer\").config(\"spark.driver.memory\",\"8g\").getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7e8908ea-a224-46f1-b488-b6c76dae8c9a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+---+-----+----+----+\n",
      "|one|two|three|four|five|\n",
      "+---+---+-----+----+----+\n",
      "|  1|  2|    4|   1|   4|\n",
      "|  3|  6|    5|   4|   5|\n",
      "|  9|  4| NULL|   9| -99|\n",
      "| 11| 17| NULL|   3| -99|\n",
      "+---+---+-----+----+----+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import pyspark.sql.functions as F\n",
    "from pyspark.sql import Column,DataFrame\n",
    "from typing import Optional,Union#Union表示可以是多种类型，Optional表示可以是某一类型或者None\n",
    "\n",
    "test_df = spark.createDataFrame(\n",
    "    [[1,2,4,1],[3,6,5,4],[9,4,None,9],[11,17,None,3]],\n",
    "    [\"one\",\"two\",\"three\",\"four\"]\n",
    ")\n",
    "\n",
    "def scalarNAFillerFunction(df:DataFrame,inputCol:Union[Column,str],outputCol:str,filler:float=0.0):\n",
    "    if isinstance(inputCol,str):inputCol = F.col(inputCol)\n",
    "    return df.withColumn(outputCol,inputCol).fillna(filler,subset=outputCol)\n",
    "\n",
    "scalarNAFillerFunction(test_df,\"three\",\"five\",-99).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6a3e6dbc-1ed7-47f2-a7bf-5b6829246a1f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "value we want to repalce our null values with\n"
     ]
    }
   ],
   "source": [
    "from pyspark.ml.param import Param,Params,TypeConverters\n",
    "\n",
    "filler = Param(\n",
    "    Params._dummy(),\n",
    "    \"filler\",\n",
    "    \"value we want to repalce our null values with\",\n",
    "    typeConverter = TypeConverters.toFloat\n",
    ")\n",
    "print(filler.doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "826f525c-6705-4662-878a-1a9c480cf0c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml import Transformer\n",
    "from pyspark.ml.param.shared import HasInputCol,HasOutputCol\n",
    "from pyspark import keyword_only\n",
    "\n",
    "class ScalarNAFiller(Transformer,HasInputCol,HasOutputCol):\n",
    "    \n",
    "    filler = Param(\n",
    "    Params._dummy(),\n",
    "    \"filler\",\n",
    "    \"value we want to repalce our null values with\",\n",
    "    typeConverter = TypeConverters.toFloat\n",
    "    )\n",
    "\n",
    "    @keyword_only\n",
    "    def __init__(self,inputCol=None,outputCol=None,filler=None):\n",
    "        super().__init__()\n",
    "        self._setDefault(filler=None)\n",
    "        kwargs = self._input_kwargs\n",
    "        self.setParams(**kwargs)\n",
    "\n",
    "    @keyword_only\n",
    "    def setParams(self,*,inputCol=None,outputCol=None,filler=None):\n",
    "        kwargs = self._input_kwargs\n",
    "        return self._set(**kwargs)\n",
    "\n",
    "    def setFiller(self,new_filler):\n",
    "        return self.setParams(filler=new_filler)\n",
    "\n",
    "    def getFiller(self):\n",
    "        return self.getOrDefault(self.filler)\n",
    "        \n",
    "    def setInputCol(self,new_inputCol):\n",
    "        return self.setParams(inputCol=new_inputCol)\n",
    "\n",
    "    def getInputCol(self):\n",
    "        return self.getOrDefault(self.inputCol)\n",
    "\n",
    "    def setOutputCol(self,new_outputCol):\n",
    "        return self.setParams(outputCol=new_outputCol)\n",
    "\n",
    "    def getOutputCol(self):\n",
    "        return self.getOrDefault(self.outputCol)\n",
    "\n",
    "    def _transform(self,dataset):\n",
    "        if not self.isSet(\"inputCol\"):\n",
    "            raise ValueError(\"No input column set for the ScalarNAFiller transformer\")\n",
    "        input_column = dataset[self.getInputCol()]\n",
    "        output_column = self.getOutputCol()\n",
    "        na_filler = self.getFiller()\n",
    "        return dataset.withColumn(output_column,input_column.cast(\"double\")).fillna(na_filler,output_column)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "fbad3729-68b9-4a24-ba5b-987e97a6fffd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+---+-----+----+----+\n",
      "|one|two|three|four|five|\n",
      "+---+---+-----+----+----+\n",
      "|  1|  2|    4|   1| 4.0|\n",
      "|  3|  6|    5|   4| 5.0|\n",
      "|  9|  4| NULL|   9|-1.0|\n",
      "| 11| 17| NULL|   3|-1.0|\n",
      "+---+---+-----+----+----+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "test_ScalarNAFiller = ScalarNAFiller(inputCol=\"three\",outputCol=\"five\",filler=-1)\n",
    "test_ScalarNAFiller.transform(test_df).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "611d4302-2c27-473e-b042-04c5f12f3525",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ?test_ScalarNAFiller.transform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "dcbf371e-f15a-4cee-a737-f2ea6a006b1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml.util import DefaultParamsReadable,DefaultParamsWritable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "2ace1955-fe02-40f4-929d-2995bd1660b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "class _ExtremeValueCapperParams(HasInputCol, HasOutputCol, DefaultParamsReadable,DefaultParamsWritable):\n",
    "\n",
    "    boundary = Param(\n",
    "        Params._dummy(),\n",
    "        \"boundary\",\n",
    "        \"Multiple of standard deviation for the cap  and floor.default=0.0\",\n",
    "        TypeConverters.toFloat\n",
    "    )\n",
    "\n",
    "    def __init__(self,*args):\n",
    "        super().__init__(*args)\n",
    "        self._setDefault(boundary=0.0)\n",
    "\n",
    "    def getBoundary(self):\n",
    "        return self.getOrDefault(self.boundary)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "9e9ba6ec-c3ff-444d-ad2a-fe7b474d0fcc",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml import Model\n",
    "\n",
    "class ExtremeValueCapperModel(Model,_ExtremeValueCapperParams):\n",
    "\n",
    "    cap = Param(\n",
    "        Params._dummy(),\n",
    "        \"cap\",\n",
    "        \"Upper bound ofr the values inputCol can take\",\n",
    "        TypeConverters.toFloat\n",
    "    )\n",
    "\n",
    "    floor = Param(\n",
    "        Params._dummy(),\n",
    "        \"floor\",\n",
    "        \"Lower bound ofr the values inputCol can take\",\n",
    "        TypeConverters.toFloat\n",
    "    )\n",
    "    \n",
    "\n",
    "    @keyword_only\n",
    "    def __init__(self,inputCol=None,outputCol=None,cap=None,floor=None):\n",
    "        super().__init__()\n",
    "        kwargs = self._input_kwargs\n",
    "        self.setParams(**kwargs)\n",
    "\n",
    "    @keyword_only\n",
    "    def setParams(self, inputCol=None, outputCol=None, cap=None, floor=None):\n",
    "        kwargs = self._input_kwargs\n",
    "        return self._set(**kwargs)\n",
    "        \n",
    "\n",
    "    def _transform(self,dataset):\n",
    "        if not self.isSet(\"inputCol\"):\n",
    "            raise ValueError(\"No input column for the ExtremeValueCapperModel transformer.\")\n",
    "            \n",
    "        input_column = dataset[self.getInputCol()]\n",
    "        output_column = self.getOutputCol()\n",
    "        cap_value = self.getOrDefault(\"cap\")\n",
    "        floor_value = self.getOrDefault(\"floor\")\n",
    "\n",
    "        return dataset.withColumn(output_column,\n",
    "                                  F.when(input_column>cap_value,cap_value)\n",
    "                                  .when(input_column<floor_value,floor_value)\n",
    "                                  .otherwise(input_column)\n",
    "                                 )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "91118d21-eec6-45b2-a86b-39a3550c7747",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml import Estimator\n",
    "\n",
    "class ExtremeValueCapper(Estimator,_ExtremeValueCapperParams):\n",
    "\n",
    "    @keyword_only\n",
    "    def __init__(self,inputCol=None,outputCol=None,boundary=None):\n",
    "        super().__init__()\n",
    "        # self._setDefault(boundary=0.0)\n",
    "        kwargs = self._input_kwargs\n",
    "        self.setParams(**kwargs)\n",
    "\n",
    "    @keyword_only\n",
    "    def setParams(self, inputCol=None, outputCol=None, boundary=None):\n",
    "        kwargs = self._input_kwargs\n",
    "        return self._set(**kwargs)\n",
    "\n",
    "    def _fit(self,dataset):\n",
    "        input_column = self.getInputCol()\n",
    "        output_column = self.getOutputCol()\n",
    "        boundary = self.getBoundary()\n",
    "\n",
    "        avg,stddev = dataset.agg(F.mean(input_column),F.stddev(input_column)).head()\n",
    "\n",
    "        cap_value = avg + boundary*stddev\n",
    "        floor_value = avg - boundary*stddev\n",
    "\n",
    "        return ExtremeValueCapperModel(inputCol=input_column,outputCol=output_column,cap=cap_value,floor=floor_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9dbdf0db-efe8-4027-a4f4-708e08da8c84",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+---+-----+----+------------------+\n",
      "|one|two|three|four|              five|\n",
      "+---+---+-----+----+------------------+\n",
      "|  1|  2|    4|   1|1.2390477143047667|\n",
      "|  3|  6|    5|   4|               3.0|\n",
      "|  9|  4| NULL|   9|               9.0|\n",
      "| 11| 17| NULL|   3|10.760952285695232|\n",
      "+---+---+-----+----+------------------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "test_EVC = ExtremeValueCapper(inputCol=\"one\",outputCol=\"five\",boundary=1.0)\n",
    "test_EVC.fit(test_df).transform(test_df).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "c694d333-e84e-4c9b-99d6-afe48de2dc23",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml.param.shared import HasInputCols,HasOutputCols\n",
    "\n",
    "class ScalarNAFiller(\n",
    "    Transformer,\n",
    "    HasInputCol,\n",
    "    HasOutputCol,\n",
    "    HasInputCols,\n",
    "    HasOutputCols,\n",
    "    DefaultParamsReadable,\n",
    "    DefaultParamsWritable\n",
    "):\n",
    "\n",
    "    filler = Param(\n",
    "    Params._dummy(),\n",
    "    \"filler\",\n",
    "    \"value we want to repalce our null values with\",\n",
    "    typeConverter = TypeConverters.toFloat\n",
    "    )\n",
    "\n",
    "    @keyword_only\n",
    "    def __init__(self,inputCol=None,outputCol=None,inputCols=None,outputCols=None,filler=None):\n",
    "        super().__init__()\n",
    "        self._setDefault(filler=None)\n",
    "        kwargs = self._input_kwargs\n",
    "        self.setParams(**kwargs)\n",
    "\n",
    "    @keyword_only\n",
    "    def setParams(self,*,inputCol=None,outputCol=None,inputCols=None,outputCols=None,filler=None):\n",
    "        kwargs = self._input_kwargs\n",
    "        return self._set(**kwargs)\n",
    "\n",
    "    def getFiller(self):\n",
    "        return self.getOrDefault(self.filler)\n",
    "\n",
    "    def checkParams(self):\n",
    "        if self.isSet(\"inputCol\")  and self.isSet(\"inputCols\"):\n",
    "            raise ValueError(\"Only one of inputCol or inputCols must be set.\")\n",
    "        if not (self.isSet(\"inputCol\")  or self.isSet(\"inputCols\")):\n",
    "            raise ValueError(\"One of inputCol or inputCols must be set.\")\n",
    "        if self.isSet(\"inputCols\"):\n",
    "            if len(self.getInputCols()) != len(self.getOutputCols()):\n",
    "                raise ValueError(\"The length of inputCols does not match the length of outputCols\")\n",
    "                \n",
    "    def _transform(self,dataset):\n",
    "        self.checkParams()\n",
    "        \n",
    "        input_columns = [self.getInputCol()] if self.isSet(\"inputCol\") else self.getInputCols()\n",
    "        output_columns = [self.getOutCol()] if self.isSet(\"outputCol\") else self.getOutputCols()\n",
    "\n",
    "        answer = dataset\n",
    "\n",
    "        if input_columns != output_columns:\n",
    "            for in_col,out_col in zip(input_columns,output_columns):\n",
    "                answer = answer.withColumn(out_col,F.col(in_col))\n",
    "                \n",
    "        na_filler = self.getFiller()\n",
    "        return answer.fillna(na_filler,output_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e313e3f4-7ad3-47b4-9cdc-2ac6bc213483",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----+\n",
      "|five|\n",
      "+----+\n",
      "|   1|\n",
      "|NULL|\n",
      "|   2|\n",
      "|   0|\n",
      "+----+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "temp_df = spark.createDataFrame([[1],[None],[2],[0]],\"five int\")\n",
    "temp_df.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "cfa2d9d6-433e-4b64-920d-e9ea76ac0e81",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----------+---+---+-----+----+----+\n",
      "|         id|one|two|three|four|five|\n",
      "+-----------+---+---+-----+----+----+\n",
      "| 8589934592|  1|  2|    4|   1|   1|\n",
      "|25769803776|  3|  6|    5|   4|NULL|\n",
      "|42949672960|  9|  4| NULL|   9|   2|\n",
      "|60129542144| 11| 17| NULL|   3|   0|\n",
      "+-----------+---+---+-----+----+----+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "test_df_new =test_df.withColumn(\"id\", F.monotonically_increasing_id())\n",
    "temp_df = temp_df.withColumn(\"id\", F.monotonically_increasing_id())\n",
    "test_df_new = test_df_new.join(temp_df,on=\"id\")\n",
    "test_df_new.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "debdb441-97ca-41bb-bcde-366f9f3a01ab",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----------+---+---+-----+----+----+---+-----+-----+\n",
      "|         id|one|two|three|four|five|six|seven|eight|\n",
      "+-----------+---+---+-----+----+----+---+-----+-----+\n",
      "| 8589934592|  1|  2|    4|   1|   1|  4|    1|    1|\n",
      "|25769803776|  3|  6|    5|   4|NULL|  5|    4|   -1|\n",
      "|42949672960|  9|  4| NULL|   9|   2| -1|    9|    2|\n",
      "|60129542144| 11| 17| NULL|   3|   0| -1|    3|    0|\n",
      "+-----------+---+---+-----+----+----+---+-----+-----+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "ScalarNAFiller(inputCols=[\"three\",\"four\",\"five\"],outputCols=[\"six\",\"seven\",\"eight\"],filler=-1).transform(test_df_new).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "e7650ea8-5b22-4565-8f55-a36f131fdfdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = r\"D:\\bigdata\\spark_ practice\\DataAnalysisWithPythonAndPySpark-Data-trunk\\recipes\\epi_r.csv\"\n",
    "food = spark.read.csv(file_path,inferSchema=True,header=True)\n",
    "\n",
    "def sanitize_column_name(name):\n",
    "    answer = name\n",
    "    for i,j in ((\" \",\"_\"),(\"-\",\"_\"),(\"&\",\"and\"),(\"/\",\"_\")):\n",
    "        answer = answer.replace(i,j)\n",
    "    return \"\".join([char for char in answer if char.isalpha() or char.isdigit() or char==\"_\"])\n",
    "\n",
    "food = food.toDF(*[sanitize_column_name(col) for col in food.columns])\n",
    "food = food.where(F.col(\"cakeweek\").isin([0.0,1.0])|F.col(\"cakeweek\").isNull()\n",
    "                 & F.col(\"wasteless\").isin([0.0,1.0])|F.col(\"wasteless\").isNull())\n",
    "\n",
    "IDENTIFIERS = [\"title\"]\n",
    "CONTINUOUS_COLUMNS = [\"rating\",\"calories\",\"protein\",\"fat\",\"sodium\"]\n",
    "TARGET_COLUMN = [\"dessert\"]\n",
    "BINARY_COLUMNS = [x for x in food.columns if x not in IDENTIFIERS and x not in  CONTINUOUS_COLUMNS and x not in TARGET_COLUMN]\n",
    "\n",
    "food = food.dropna(how=\"all\").dropna(subset=TARGET_COLUMN)\n",
    "\n",
    "\n",
    "from typing import Optional\n",
    "@F.udf(T.BooleanType())\n",
    "def is_a_number(value:Optional[str])->bool:\n",
    "    if not value:\n",
    "        return True\n",
    "    elif value.replace('.','').isnumeric():\n",
    "        return True\n",
    "    else:\n",
    "        return False\n",
    "\n",
    "food = food.where(\n",
    "    is_a_number(F.col(\"rating\"))&is_a_number(F.col(\"calories\"))\n",
    ").withColumns(\n",
    "    {col:F.col(col).cast(\"double\") for col in [\"rating\",\"calories\"]}\n",
    ")\n",
    "\n",
    "maximum = {\n",
    "    \"calories\":3184.0,\n",
    "    \"protein\":173.0,\n",
    "    \"fat\":207.0,\n",
    "    \"sodium\":5649.0\n",
    "}\n",
    "for k,v in maximum.items():\n",
    "    food = food.withColumn(k,F.when(F.isnull(F.col(k)),F.col(k)).otherwise(F.least(F.col(k),F.lit(v))))\n",
    "\n",
    "inst_sum_of_binary_columns = list(map(lambda x:F.sum(F.col(x)).alias(x),BINARY_COLUMNS))\n",
    "sum_of_binary_columns = food.select(inst_sum_of_binary_columns)\n",
    "sum_of_binary_columns = sum_of_binary_columns.head().asDict()\n",
    "num_rows = food.count()\n",
    "too_rare_features = [k for k,v in sum_of_binary_columns.items() if v<10 or v>num_rows-10]\n",
    "BINARY_COLUMNS = list(set(BINARY_COLUMNS) - set(too_rare_features))\n",
    "\n",
    "food = (\n",
    "    food.withColumn(\"protein_ratio\",F.col(\"protein\")*4/F.col(\"calories\"))\n",
    "    .withColumn(\"fat_ratio\",F.col(\"fat\")*9/F.col(\"calories\"))\n",
    ")\n",
    "ratio_set=[\"protein_ratio\",\"fat_ratio\"]\n",
    "food = food.fillna(0.0,subset=ratio_set)\n",
    "CONTINUOUS_COLUMNS.extend(ratio_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "dceb8e33-2045-4ae4-8ba5-03eee5c940f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "scalar_na_filler = ScalarNAFiller(inputCols=BINARY_COLUMNS, outputCols=BINARY_COLUMNS, filler=0.0)\n",
    "extreme_value_capper_cal = ExtremeValueCapper(inputCol=\"calories\",outputCol=\"calories\",boundary=2.0)\n",
    "extreme_value_capper_pro = ExtremeValueCapper(inputCol=\"protein\",outputCol=\"protein\",boundary=2.0)\n",
    "extreme_value_capper_fat = ExtremeValueCapper(inputCol=\"fat\",outputCol=\"fat\",boundary=2.0)\n",
    "extreme_value_capper_sod = ExtremeValueCapper(inputCol=\"sodium\",outputCol=\"sodium\",boundary=2.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "f3247bdc-5f1c-4ec5-b2f3-658c69702704",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Area under ROC = 0.9935072044847375\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcoAAAHACAYAAAAiByi6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA8D0lEQVR4nO3de1hVVf7H8c8BuWmCmgFeKK2mzDQtTQbNKZWJ0iynLEpHzcxu1s+im3bRzBKnUXNmcrRMu5tmaZmapiiZaTl5mZzJbMxrJqhZgKjczv79sQMiAeFwzlnn8n49z3lib/aGLzs8H9baa6/lsCzLEgAAqFSI6QIAAPBlBCUAANUgKAEAqAZBCQBANQhKAACqQVACAFANghIAgGoQlAAAVKOe6QK8zel06ocfflDDhg3lcDhMlwMAMMSyLOXl5al58+YKCam63Rh0QfnDDz8oISHBdBkAAB+xb98+tWzZssrPB11QNmzYUJJ9YaKjow1XAwAwJTc3VwkJCWW5UJWgC8rS7tbo6GiCEgBwyttwDOYBAKAaBCUAANUgKAEAqAZBCQBANQhKAACqQVACAFANghIAgGoQlAAAVIOgBACgGgQlAADVMBqUa9asUd++fdW8eXM5HA69//77pzwnMzNTl1xyiSIiInTuuefq1Vdf9XidAIDgZTQo8/Pz1aFDB02bNq1Gx+/atUt9+vRRjx49tGXLFt1///26/fbbtXz5cg9XCgAIVkYnRb/66qt19dVX1/j4GTNmqHXr1po8ebIk6YILLtDatWv1/PPPKyUlxVNlAgCCmF+tHrJ+/XolJydX2JeSkqL777+/ynMKCgpUUFBQtp2bm+up8vzK4aMF+mT7IZ0oLinbZ1nSobwCnSiy9xUUO5WVc0KWLB0rLNGhvIKqvhwAeN3fbr5Y58dXv0SWO/hVUGZlZSkuLq7Cvri4OOXm5ur48eOKioo66Zz09HSNGzfOWyW6TYnT0pe7j+hYUYkKipzad+SYtuz7WceLSk469lhhsQ7WMMRyjxfpp2NFKnFa7i4ZALzqRCXvh57gV0HpitGjRystLa1su3ShTl9gWZYKip2S7Jbcxj0/6fufjikr94Q+2/Gjdh3O9+j3b98iRs1iIivsaxBRT01PCy9bny22YYSiwkPlkEPxMREKDw31aE0AUCnLKWVlS82ale1qfUYDr3xrvwrK+Ph4ZWdnV9iXnZ2t6OjoSluTkhQREaGIiAhvlHdKxSVOvb/lh7JA/CYr75TdmRc0i1a9EIfiYyJ1QbNotWx88s/pkNS8UZTCQk89Nis0xKFmMZGKDAtVkwbhrv4oAOA9Tqd0553S/PnSypVS585e/fZ+FZRJSUlaunRphX0rVqxQUlKSoYpqbt+RY3pw/r+1YdeRKo+5sHm0zjnjNDVrFKnTwuvpjxfGqU18tBerBAAf43RKd9whzZolhYRIO3YEV1AePXpUO3bsKNvetWuXtmzZoiZNmujMM8/U6NGjtX//fr3++uuSpLvuuksvvPCCHnnkEd12221atWqV3nnnHS1ZssTUj1CtfUeO6cOvftD7m/fr2+yjkqQQh5R6aYLaNotWQpP66pjQSPVCQ1QvxKHIMLo1AaCM0ykNHy7Nnm2H5BtvSDff7PUyjAbll19+qR49epRtl95LHDJkiF599VUdOHBAe/fuLft869attWTJEj3wwAP629/+ppYtW+rll1/2yUdD3t+8X/fP21JhX/sWMXr6ugt18ZmNzRQFAP6ipES6/Xbp1VftkHzrLSMhKUkOy7KCavhjbm6uYmJilJOTo+hoz3RrrvvusAbM/KJse/x1F6rXBXFq3qjy+6gAgF8pKZGGDZNee00KDbVDMjXV7d+mpnngV/co/YFlWRq/eJsku5v14wf+oHNjPf+cDwAEjKIiaf9+OyTnzJFuusloOQSlm73x+R5tO5Cr0BCH1jzSQy1oRQJA7URGSh98IH3+udSzp+lqWD3EnQ7kHFf60m8kSfdccQ4hCQA1VVwszZtnTxEmSfXr+0RISgSlW83dsE/Hi0rUMaGR7k8+z3Q5AOAfioulwYPtwTpjxpiu5iR0vbrJN1m5emG1/ajLrV1bKTTEYbgiAPADxcXSn/9stybr1ZM6dTJd0UkISjdZ/c0hlTgttW8Ro2s7NDddDgD4vuJiaeBA6Z13pLAwe+ad664zXdVJCEo32bjHnnHn6vbxCqE1CQDVKyqyQ3L+fDsk33tP6tvXdFWV4h6lG/x4tEAZ3xyUJHU/9wzD1QCAj7Os8pAMD5cWLPDZkJQISrdYu+OwLEtqE99Q7VvGmC4HAHybwyGlpNiPgSxYIF1zjemKqkXXqxt8sv2QJOmK82MNVwIAfmLYMOmqq6QWLUxXckq0KN1g3Xc/SpL+cF5Tw5UAgI8qLJQefFA6eLB8nx+EpERQ1tlP+YXKyj0hyZ70HADwG4WF0o03SlOmSH362KuC+BGCso6+ycqTJLVsHKWGkWGGqwEAH1NQIPXvLy1aZN+TfPZZezUQP8I9yjr6fKfd7dquOa1JAKigoEC64QZpyRI7JBctkv74R9NV1RpBWUc7DtkLMl/auonhSgDAh5w4YYfk0qVSVJT04YdSr16mq3IJQVlHP+UXSpJObxBuuBIA8CEjR5aH5OLFPjPBuSv8q6PYB/10rEiS1JigBIByo0dLbdva3a5+HJISLco6sSxL2b+MeG1Sn6AEEOQsy55MQJJatZK++spefNnP0aKsg8NHC3Ukv1AOh/S7uNNMlwMA5hw/bk9Dt3Bh+b4ACEmJoKyTvBN2t2v9sFBFhgXGLwQA1NqxY9K119rdrLfdJuXkmK7Ireh6rYPSZyhbn9HAcCUAYEhpSGZkSA0a2I+AxATW43IEZR3sOGg/GtImPtpwJQBgQH6+3d26erV02mnSRx9Jl11muiq3IyjrIL+gWJLUKIoZeQAEmfx8e9WPzEw7JJctk7p1M12VR3CPsg7yC+2gbBDB3xsAgsxLL9kh2bChtHx5wIakRIuyTvb/dFySdEbDCMOVAICXjRwp7dkjpaZKSUmmq/EogrIOco7bo17joiMNVwIAXpCfL4WHS2Fh9sTmU6earsgr6Hqtg6ISS5IUFuowXAkAeFhenr3Q8sCBUlGR6Wq8ihZlHRSV2GuqhYfy9waAAJaXJ119tfTZZ/ajHzt3Suefb7oqr+Edvg4KfwnKsHpcRgABKjfXbkl+9pnUqJG0cmVQhaREi7JOCot/CUpalAACUU6OHZKffy41biytWCF16mS6Kq8jKOugtOuVe5QAAk5OjpSSIn3xhR2SK1dKl1xiuiojaArVQelgHu5RAgg4//2v9O9/S02a2NPTBWlISrQo66SIrlcAgaprV3vB5dNPlzp2NF2NUQRlHTCYB0BA+ekn6eDB8sE6vXqZrcdH8A5fB9yjBBAwjhyRkpOlyy+Xtm0zXY1PIShdVOK05LRvUXKPEoB/Kw3JTZskp1MqKTFdkU/hHd5Fpa1JiXuUAPzYjz/aXaybN0uxsfaSWe3ama7Kp3CP0kWFvwrKenS9AvBHP/5otyS3bCkPybZtTVflc2gKuah0xKskhYVwGQH4mdKW5JYtUlwcIVkNWpQuKn2Gsl6IQyEhtCgB+Jl69aSICCk+3g7JNm1MV+SzCEoXlY94pTUJwA/FxNgLLh86JP3ud6ar8Wm8y7uokEdDAPibgwelV14p327UiJCsAVqULipbYovJBgD4g+xsqWdP6euvpYIC6a67TFfkN3iXd1FRcemizVxCAD4uK0vq0cMOyebNmXGnlniXd1Eh9ygB+IMDB+yQ3LZNatFCysyku7WW6Hp1EdPXAfB5pSG5fbvUsqU9uvXcc01X5XdoDrmIUa8AfNqxY+UhmZBgtyQJSZfwLu8iBvMA8Gn160vDhklnnmmH5DnnmK7Ib/Eu76JCBvMA8HUPPyx99ZV09tmmK/FrvMu7iHuUAHzO/v3SwIFSbm75vpgYc/UECAbzuIh7lAB8yr599j3J776zl8maO9d0RQGDd3kXld2jJCgBmLZ3r3TFFXZItm4t/eUvpisKKLzLu6iwhHuUAHzAnj12SO7cad+LzMyUzjrLdFUBhXd5F5UusxXGqFcApuzebYfkrl32qNbMTHuUK9yKe5QuYjAPAKMsS7rlFjssS0OyZUvTVQUkmkMu4h4lAKMcDmnWLOmyy6RPPiEkPYgWpYsKixn1CsCA4mJ70WVJattWWrPGDk14DO/yLso9USxJio7ibw0AXrJzp9SunbRqVfk+QtLjCEoX5Z4okiRFR4YZrgRAUPjuO3vgzvbt9ow7TqfpioKG8aCcNm2aWrVqpcjISCUmJmrDhg3VHj916lSdf/75ioqKUkJCgh544AGdOHHCS9WWKyiyf0kjGPUKwNN27LBDct8+qU0backSKYT3Hm8xeqXnzZuntLQ0jR07Vps2bVKHDh2UkpKigwcPVnr8nDlzNGrUKI0dO1bbtm3TrFmzNG/ePD322GNerlyyZD9HGRJCtwcAD/rf/+yQ/P576YIL7KWy4uNNVxVUjAbllClTNHz4cA0dOlRt27bVjBkzVL9+fc2ePbvS49etW6du3bppwIABatWqla688krdcsstp2yFeoJl56Qc3B8A4CmlIbl/vz1wh5A0wlhQFhYWauPGjUpOTi4vJiREycnJWr9+faXndO3aVRs3biwLxp07d2rp0qXq3bt3ld+noKBAubm5FV7u4PwlKYlJAB4zdar0ww/ShRfaIRkXZ7qioGRsyObhw4dVUlKiuN/8j4+Li9M333xT6TkDBgzQ4cOHddlll8myLBUXF+uuu+6qtus1PT1d48aNc2vt0q9blG7/0gBgmzpVOu006cEHpdhY09UELb+6G5yZmakJEybon//8pzZt2qQFCxZoyZIlGj9+fJXnjB49Wjk5OWWvffv2uaUW5y9BGUJSAnCn/fvLR7SGhdkTnBOSRhlrUTZt2lShoaHKzs6usD87O1vxVfTBP/nkkxo0aJBuv/12SVL79u2Vn5+vO+64Q48//rhCKhkFFhERoYiICPf/AKLrFYCbbdtmL5V13XXS9OmMbPURxv4vhIeHq1OnTsrIyCjb53Q6lZGRoaSkpErPOXbs2ElhGBoaKkmySvtCvcRJ1ysAd/r6a3vgTna29MUX0tGjpivCL4xOK5OWlqYhQ4aoc+fO6tKli6ZOnar8/HwNHTpUkjR48GC1aNFC6enpkqS+fftqypQpuvjii5WYmKgdO3boySefVN++fcsC01tKg5lRrwDq7L//lXr2lA4elDp2lFaulKKjTVeFXxgNytTUVB06dEhjxoxRVlaWOnbsqGXLlpUN8Nm7d2+FFuQTTzwhh8OhJ554Qvv379cZZ5yhvn376tlnn/V67WUtSq9/ZwAB5T//sUPy0CHp4ovtkGzSxHRV+BWH5e0+S8Nyc3MVExOjnJwcRdfhL7bBszdozbeHNOnGDurfiVn7Abhg61Y7JA8fli65RFqxgpD0oprmAXeKXXSisESSFBnGJQTgop07pZ9/ljp1oiXpw1j6wkV5BfbqIQ2ZFB2Aq667zp639dJLpcaNTVeDKhCULirtsQ5lMA+A2vj3v6VGjaSzzrK3r7zSaDk4NfoNXWSVTThgtg4AfmTTJvs5yR497JVA4BcIShc5eTwEQG1s3CglJ0s//WTP2crjH36DoHRReVAaLgSA7/vyy/KQTEqSli+XYmJMV4UaIihdZDHXK4Ca+Ne/pD/+0R7d2rWrtGwZrUk/w2AeF5U+fMo9SgBV2rTJDsmcHKlbN+mjj6SGDU1XhVoiKF3EPUoAp9SypdSihdS+vbR0KSHppwhKF3GPEsApxcbaCy7Xr2+vKwm/xD1KF1nM9QqgMuvXS6+9Vr4dG0tI+jlalC4qDcpQblICKLVunXTVVfYSWU2bSn36mK4IbkCL0kWlXa+MegUgSfrsMyklRcrLs9eVvOIK0xXBTQhKF3GPEkCZtWvLW5I9e0qLF0sNGpiuCm5CULrIyXOUACTp00/LQ7JXL+nDD+3BOwgYBKWLLLpeAezcKV19tZSfbz8vSUgGJAbzuMjJpOgAWreWRoyQtmyR3n9fiooyXRE8gKB0ERMOAJDDIU2cKBUVSeHhpquBh9D16iKnk8E8QFBatUrq1086ftzedjgIyQBHULqISdGBIJSRIV1zjfTBB9Jzz5muBl5CULqISdGBILNypR2Sx4/bEwmMGmW6IngJQemisnuUTGIHBL4VK6S+faUTJ+ywfO89KSLCdFXwEoLSRUw4AASJ5cvLQ7JvX+nddwnJIENQuqjsHiV9r0DgOnZMGjJEKiiQrruOkAxSBKWLWD0ECAL160uLFkmDB0vvvMPo1iDFc5QuYlJ0IIDl55fP1dqli/1C0KJF6aLSUa/kJBBgFi+Wzj5b+te/TFcCH0FQuojBPEAA+vBD6frrpYMHpRkzTFcDH0FQuqj8HiVJCQSERYukG26wp6O78UaCEmUISheUrhwiMeEAEBA++EDq398OydRUac4cKSzMdFXwEQSlC5zlOclgHsDfLVxYHpI33yy9+aZUj3GOKEdQuuDXLUpyEvBjliW9/LJUXCzdcov0xhuEJE5CULrg1y1KltkC/JjDYU8i8Nxz0uuvE5KoFEHpAictSsC/bd1aPiIvKkp6+GFCElUiKOuInAT8zPz50sUXS489Vh6WQDUIShdYdL0C/mnePPteZEmJdOAAQYkaIShdYInHQwC/8/bb0oABdkgOHSrNmiWF8BaIU+O3xAUVBvPQ+Qr4vjlzpD//WXI6pdtus0e6hoaargp+gqB0AY+HAH7kzTelQYPskBw2TJo5k5YkaoXfFhdwVwPwI4WFdkjefrv00kuEJGqN8dAuqDiYx1wdAGrgttuk3/1O6taNkIRL+K2pI+5RAj7ovffsFUBKde9OSMJl/OYACCyzZ9urf/TsKeXkmK4GAYCgBBA4Zs2y70ValnTFFVJ0tOmKEAAISgCBYebM8pC87z7pH/9gEAHcgqAE4P9eekm64w7745Ejpb/9jZCE2xCUAPzbm29Kd95pf3z//dLzzxOScCseDwHg3y67TDrrLOn666XJkwlJuB1BCcC/tWolbdwoNWlCSMIj6HoF4H+mTZMWLCjfPv10QhIeQ4vSFcxhB5jz97/bA3bq1ZM2b5batTNdEQIcLco64o9YwIv+9jc7JCXpoYekCy80Ww+CAkEJwD88/7w9qlWSHntMmjCBv1ThFQQlAN83ZYqUlmZ//Pjj0jPPEJLwGoISgG/7+GPpwQftj598Uho/npCEVzGYB4BvS062p6Zr0UJ66inT1SAIEZQAfJPTaS+NFRJiT1FHKxKG0PUKwPekp0upqVJRkb1NSMIgghKAb5kwwR7V+u670qJFpqsBzAfltGnT1KpVK0VGRioxMVEbNmyo9viff/5ZI0aMULNmzRQREaHzzjtPS5cu9VK1NosZBwDPeOYZe1SrZA/aueEGs/UAMnyPct68eUpLS9OMGTOUmJioqVOnKiUlRdu3b1dsbOxJxxcWFuqPf/yjYmNj9e6776pFixbas2ePGjVq5P3if0GHEOAmTz8tjR1rf/zss3arEvABRoNyypQpGj58uIYOHSpJmjFjhpYsWaLZs2dr1KhRJx0/e/ZsHTlyROvWrVNYWJgkqVWrVt4sGYAnjBtXPqI1PV2q5N8/YIqxrtfCwkJt3LhRycnJ5cWEhCg5OVnr16+v9JxFixYpKSlJI0aMUFxcnNq1a6cJEyaopKSkyu9TUFCg3NzcCi8APmTXLmniRPvjiRMJSfgcY0F5+PBhlZSUKC4ursL+uLg4ZWVlVXrOzp079e6776qkpERLly7Vk08+qcmTJ+uZZ56p8vukp6crJiam7JWQkODWnwNAHbVuLS1ebK8l+eijpqsBTmJ8ME9tOJ1OxcbG6qWXXlKnTp2Umpqqxx9/XDNmzKjynNGjRysnJ6fstW/fPi9WDKBSliUdPFi+3atX+RR1gI8xFpRNmzZVaGiosrOzK+zPzs5WfHx8pec0a9ZM5513nkJDQ8v2XXDBBcrKylJhYWGl50RERCg6OrrCC4BBlmVPRXfRRdK2baarAU7JWFCGh4erU6dOysjIKNvndDqVkZGhpKSkSs/p1q2bduzYIafTWbbv22+/VbNmzRQeHu7xmgHUkWXZj388+6yUnS2tWWO6IuCUjHa9pqWlaebMmXrttde0bds23X333crPzy8bBTt48GCNHj267Pi7775bR44c0ciRI/Xtt99qyZIlmjBhgkaMGGHqRwBQU5ZlP/KRnm5v/+1v0p13mq0JqAGjj4ekpqbq0KFDGjNmjLKystSxY0ctW7asbIDP3r17FRJSnuUJCQlavny5HnjgAV100UVq0aKFRo4cqUcZAAD4NsuyR7M+95y9/fe/S/fdZ7YmoIYclmUF1TQzubm5iomJUU5Ojsv3K3/KL9TF41dIknY8e7XqhfrVmCjAuyxLeuQRadIke/uFFyR6geADapoHrB5SRw4mawaqd+KE9Omn9sfTpkn33GO2HqCWCEoAnhUVJS1fbi/AfOONpqsBao0+QwDuZ1nSqlXl2zExhCT8FkEJwL0sSxo50p5EYPJk09UAdUbXKwD3sSzp//7PHrDjcEgGV/YB3IWgBOAeliXde6/0z3/aIfnyy9Jtt5muCqgzghJA3TmddkhOn26H5OzZ0q23mq4KcAuCEkDdWJb9XOSMGXZIvvKKNGSI6aoAtyEoAdSNwyGdc44UEiK9+qo0aJDpigC3IihdEFRTGQE18dBDUu/eUtu2pisB3I7HQ+qIeXkQlJxOaeJEKSenfB8hiQBFUAKoHadTuv12afRoqU8fexsIYAQlgJorKZGGDbMH7ISE2CNdQ3gbQWDjHiWAmikpsZ+LfP11KTRUeustKTXVdFWAxxGUAE6tpEQaOlR64w07JN9+m7lbETQISgCn9uCD5SE5d67Uv7/pigCv4eYCgFO74w6pZUtp3jxCEkHHbUG5YMECXXTRRe76cgB8Sdu20rffSjfcYLoSwOtqFZQvvvii+vfvrwEDBuiLL76QJK1atUoXX3yxBg0apG7dunmkSABeVlxsD9zJyCjfFxVlrh7AoBoH5cSJE3Xfffdp9+7dWrRokXr27KkJEyZo4MCBSk1N1ffff6/p06d7slafYVnMzYMAVlwsDRxoPwJyww3Szz+brggwqsaDeV555RXNnDlTQ4YM0aeffqrLL79c69at044dO9SgQQNP1ujTHEzNg0BSVGSH5Pz5UliY/SgIa0oiyNU4KPfu3auePXtKkrp3766wsDCNGzcuqEMSCChFRdItt0jvvSeFh9v/veYa01UBxtU4KAsKChQZGVm2HR4eriZNmnikKABeVlgo3XyztHChHZILFtjT0wGo3XOUTz75pOrXry9JKiws1DPPPKOYmJgKx0yZMsV91QHwjmnTykNy4UJ7JRAAkmoRlH/4wx+0ffv2su2uXbtq586dFY5xcMMO8E/33itt3Gjfn7z6atPVAD6lxkGZmZnpwTIAeF1hoVSvnj2peViY9OabpisCfFKtul5zc3P1xRdfqLCwUF26dNEZZ5zhqboAeFJBgT1Xa3y8NGMGK4AA1ahxUG7ZskW9e/dWVlaWJKlhw4Z65513lJKS4rHiAHhAQYE9Dd3ixVJkpDRypHThhaarAnxWjf+MfPTRR9W6dWt99tln2rhxo3r16qV7773Xk7X5LKYbgN86cUK6/vrykPzwQ0ISOIUatyg3btyojz/+WJdccokkafbs2WrSpIlyc3MVHR3tsQJ9HQOY4DdKQ/Kjj+zp6D78UOrVy3RVgM+rcYvyyJEjatmyZdl2o0aN1KBBA/34448eKQyAG504If3pT+UhuWQJIQnUUK0G83z99ddl9ygle87Tbdu2KS8vr2wfK4gAPmjDBmnlSql+fTskr7jCdEWA36hVUPbq1eukCcGvueYaORwOWZYlh8OhkpIStxYIwA3+8Ad7LcnTT5cuv9x0NYBfqXFQ7tq1y5N1AHC3Y8ekI0fsBZcl+/4kgFqrcVC+9tpreuihh8qmsAPgw44dk669VvruOykzUzrrLNMVAX6rxoN5xo0bp6NHj3qyFgDucOyY1Levvejy4cPSDz+YrgjwazUOShYrBvxAfr69NNaqVVLDhtLy5VJSkumqAL9Wq8E8PDMI+LD8fHtprE8+ISQBN6pVUJ533nmnDMsjR47UqSB/QOMaPufoUTsk16yRoqPtkPz9701XBQSEWgXluHHjTlp/EoAPOH5c+vFHOyQ//lhKTDRdERAwahWUN998s2JjYz1VCwBXnXGGfV9y3z6pUyfT1QABpcaDebg/CfiY3Fzpgw/Kt2NjCUnAAxj1Cvij3Fzpqqvs+Vtff910NUBAq3HXq9Pp9GQdAGoqJ8cOyc8/lxo3ZpkswMNqdY8SgGE5OVJKivTFF3ZIrlwp/bL0HQDPqHHXKwDDfv5ZuvJKOySbNLFn3iEkAY+jRQn4g+PH7ZD817/KQ7JjR9NVAUGBFiXgDyIjpZ497WWyVq0iJAEvIihdYIkRwPAyh0NKT5e++krq0MF0NUBQISjrgEdL4VFHjkj33293u0r2L1zz5kZLAoIR9ygBX/Tjj1JysrRli71U1ptvmq4ICFq0KAFfc/iw1KuXHZKxsdLo0aYrAoIaLUrAl5SG5FdfSXFx9sCdtm1NVwUENYIS8BWHDtkhuXWrHZKrV0sXXGC6KiDo0fUK+ALLkm64wQ7J+HgpM5OQBHwEQQn4AodDmjzZDsfMTKlNG9MVAfgFXa+ASZZV/pzRpZfaLcrQULM1AaiAFiVgSna2dNll9rR0pQhJwOcQlK5gYh7UVVaW1KOHtG6ddNttEsvYAT7LJ4Jy2rRpatWqlSIjI5WYmKgNGzbU6Ly5c+fK4XCoX79+ni2wCkzMA5ccOGCH5LZtUsuW0sKFUohP/FMEUAnj/zrnzZuntLQ0jR07Vps2bVKHDh2UkpKigwcPVnve7t279dBDD6l79+5eqhRwg9KQ/OYbKSHBHrhz7rmmqwJQDeNBOWXKFA0fPlxDhw5V27ZtNWPGDNWvX1+zZ8+u8pySkhINHDhQ48aN09lnn+3FaoE6+OEH6YorpO3bpTPPtEPynHNMVwXgFIwGZWFhoTZu3Kjk5OSyfSEhIUpOTtb69eurPO/pp59WbGyshg0bdsrvUVBQoNzc3AovwIinn5a+/VY66yw7JPkjD/ALRh8POXz4sEpKShQXF1dhf1xcnL755ptKz1m7dq1mzZqlLVu21Oh7pKena9y4cXUtFai755+XCgulMWOkVq1MVwOghox3vdZGXl6eBg0apJkzZ6pp06Y1Omf06NHKyckpe+3bt8/DVQK/8vPP9rOSkhQVJc2eTUgCfsZoi7Jp06YKDQ1VdnZ2hf3Z2dmKj48/6fjvvvtOu3fvVt++fcv2OX8ZVl+vXj1t375d5/zmnk9ERIQiIiI8UD1wCnv32gN3brzRXnSZBUwBv2S0RRkeHq5OnTopIyOjbJ/T6VRGRoaSkpJOOr5NmzbaunWrtmzZUva69tpr1aNHD23ZskUJCQneLB+o2t699sCdnTuld96xW5YA/JLxKezS0tI0ZMgQde7cWV26dNHUqVOVn5+voUOHSpIGDx6sFi1aKD09XZGRkWrXrl2F8xs1aiRJJ+33JOYbQLX27LFbkrt22QN2MjOlxo1NVwXARcaDMjU1VYcOHdKYMWOUlZWljh07atmyZWUDfPbu3asQH30Y20FXGn5r9247JHfvth/9yMy0JxUA4LcclmUFVQMpNzdXMTExysnJUXR0tEtfIzv3hBInZCg0xKHvJvR2c4XwW7t3292te/bYkwhkZkotWhguCkBVapoHvtlUA/zR55/b9yZ/9ztCEgggxrtegYBx8832yNbu3aXmzU1XA8BNCEqgLnbulBo0kEonzUhNNVsPALej6xVw1Y4d0uWXS716SaeYxB+A/yIoAVf873/2wJ3vv7fXkmQ9SSBgEZRAbX37rR2S+/dLbdtKq1dLlcwkBSAwEJRAbWzfbofkDz9IF14orVpVfn8SQEAiKF0QXE+eosz27fZkAgcOSO3aEZJAkCAo64B5eYJMZKQUESG1b2+HZGys6YoAeAGPhwA1Vbrgcv360hlnmK4GgJfQogSq8/XX0gcflG+fdRYhCQQZghKoyn//aw/c6d9f+vhj09UAMISgBCrzn//YA3cOHbLvSXbubLoiAIYQlMBvbd1aHpKXXCKtXCk1aWK6KgCGEJTAr331lR2Shw9LnToRkgAISqDMnj1Sz57Sjz/aXa0rV0qNG5uuCoBhPB4ClEpIkP70J+nf/7YH7zRqZLoiAD6AoHSBJabmCUghIdKLL0rHjkmnnWa6GgA+gq7XOnAwNY//27RJGj5cKiqyt0NCCEkAFdCiRPDauFFKTpZ+/llq0UJ66inTFQHwQbQoEZy+/LI8JLt2ldLSTFcEwEcRlAg+//pXeUh26yYtWyZFR5uuCoCPIigRXL74wg7JnBzpssukjz6SGjY0XRUAH0ZQIngcPy716yfl5krduxOSAGqEoETwiIqS3nxTuuoqaelSRrcCqBFGvSLwFRdL9X75Ve/Vy559h2d7ANQQLUoEts8+ky64wF5XshQhCaAWCEoXWEzM4x/WrrW7WXfskMaPN10NAD9FUNaBQ7RMfNann9ohefSo3d06a5bpigD4KYISgWfNGunqq6X8fPtRkA8/lOrXN10VAD9FUCKwfPJJeUj+8Y/SokX2aFcAcBFBicBhWfa9yGPHpJQU6YMPCEkAdUZQInA4HNJ770kPPyy9/z4hCcAtCEr4v337yj+OiZGee06KjDRXD4CAQlDCv61cKZ1/vjRpkulKAAQoghL+a8UKqW9few7XTz6RSkpMVwQgABGULmC+AR+wfLkdkidO2P99910pNNR0VQACEEFZF8w3YMayZdJ110kFBdK119ohGRFhuioAAYqghH/56CN7qayCAjss58+XwsNNVwUggBGU8C/bt9sh+ac/Se+8Q0gC8DiW2YJ/uf9+qVUrqU8fKSzMdDUAggAtSvi+1aulnJzy7X79CEkAXkNQwrctWmRPR5eSIuXlma4GQBAiKOG7PvhA6t9fKiqyu1uZkg6AAQQlfNPCheUhefPN0ptvSvW4pQ7A+whK+J4FC6SbbpKKi6UBA6Q33iAkARhDULrAspibx2M++EBKTbVDcuBA6fXXCUkARvEOVAdMzOMB550nNWliD9555RWmpQNgHEEJ33LBBdK//iW1aEFIAvAJdL3CvHnzpIyM8u0zzyQkAfgMWpQw6+23pT//2Z7UfMMGqV070xUBQAW0KGHOnDl2SDqd9ujWtm1NVwQAJyEoYcabb0qDBtkhefvt0ksvSSH8OgLwPbwzwfveeEMaPNgOyeHDpRdfJCQB+CzeneBdq1dLQ4ZIliXdeac0YwYhCcCnMZgH3nXZZdINN0hnnCG98AIhCcDnEZQuYGKeOggLs0e6hoQQkgD8Au9UdeBgap6amTXL7mZ1Ou3tevUISQB+gxYlPGvmTOmOO+yPe/a053EFAD/iE3/WT5s2Ta1atVJkZKQSExO1YcOGKo+dOXOmunfvrsaNG6tx48ZKTk6u9ngY9NJL5SH5f/9nrwgCAH7GeFDOmzdPaWlpGjt2rDZt2qQOHTooJSVFBw8erPT4zMxM3XLLLVq9erXWr1+vhIQEXXnlldq/f7+XK0e1XnzR7m6VpJEjpalT6asG4JccluE1oxITE3XppZfqhRdekCQ5nU4lJCTovvvu06hRo055fklJiRo3bqwXXnhBgwcPPuXxubm5iomJUU5OjqKjo12qed+RY+r+3GpFhoXom/FXu/Q1Atr06dI999gfP/CANHkyIQnA59Q0D4y2KAsLC7Vx40YlJyeX7QsJCVFycrLWr19fo69x7NgxFRUVqUmTJpV+vqCgQLm5uRVe8KDdu+0WpCSlpRGSAPye0aA8fPiwSkpKFBcXV2F/XFycsrKyavQ1Hn30UTVv3rxC2P5aenq6YmJiyl4JCQl1rhvVaNVKmjtXevRRadIkQhKA3zN+j7IuJk6cqLlz52rhwoWKjIys9JjRo0crJyen7LVv3z4vVxkk8vPLP77+emniREISQEAwGpRNmzZVaGiosrOzK+zPzs5WfHx8tedOmjRJEydO1Mcff6yLLrqoyuMiIiIUHR1d4QU3+/vf7eWx9uwxXQkAuJ3RoAwPD1enTp2U8atFe51OpzIyMpSUlFTlec8995zGjx+vZcuWqXPnzt4oFVWZOtW+J7l7tzR/vulqAMDtjE84kJaWpiFDhqhz587q0qWLpk6dqvz8fA0dOlSSNHjwYLVo0ULp6emSpL/85S8aM2aM5syZo1atWpXdyzzttNN02mmnebV2h4K8a/H55+0BO5L0+OPSgw+arQcAPMB4UKampurQoUMaM2aMsrKy1LFjRy1btqxsgM/evXsV8qvpzqZPn67CwkL179+/wtcZO3asnnrqKW+WHtwmT5Yeesj++IknpKef5p4kgIBk/DlKb3Pnc5RRYaHaNv4qN1foByZNkh5+2P54zBjpqacISQB+xy+eo4QfOn5cevVV++OxY6Vx4whJAAHNeNcr/ExUlLRqlfTee9Ldd5uuBgA8jhYlaubf/y7/ODaWkAQQNAhKnNqzz0odO9rrSgJAkCEoUb3x4+1RrZJUxYouABDICEoXBM044XHj7FGtkpSeLo0ebbYeADCAwTx1ENCDPZ96yg5KSfrLX6RHHjFaDgCYQlCiIsuyQ/Lpp+3t554rf2YSAIIQQYmTFRXZ/500iWnpAAQ9ghIVORz2KNfevaXLLjNdDQAYx2Ae2N2ts2fbs+5IdlgSkgAgiaCEZdmjWYcNk/r1k0pKTFcEAD6FrtdgZlnSo49Kf/2rvd23rxQaarYmAPAxBGWwsiz7kY9Jk+ztF16QRowwWxMA+CCCMhhZlr2W5JQp9va0adI995itCQB8FEHpAkt+PjXPmDHlITl9unTXXWbrAQAfxmCeOvDbiXmuvVZq1Eh68UVCEgBOgRZlMLr0UmnHDun0001XAgA+jxZlMLAsadQoacOG8n2EJADUCEEZ6CxLuvdee2Lzq66SfvrJdEUA4FcIykDmdNqPfPzzn/ZsO5MnS40bm64KAPwK9ygDldNpP/Lx4ot2SL7yijRkiOmqAMDvEJSByOm0R7POnGmH5KuvSoMHm64KAPwSQRmIpk2zQzIkRHrtNenPfzZdEQD4LYIyEN1+u7R0qR2QAweargYA/BpB6QLLFyfmcTrtblaHQ4qKsoPS4bdTIgCAz2DUax04fCWISkrsZbJGjy5PcV+pDQD8HC1Kf1cakq+9Zi+RdcstUocOpqsCgIBBUPqzkhJp6FDpjTfskJwzh5AEADcjKP1VSYl0663Sm2/aITl3rtS/v+mqACDgEJT+qLjYnjxgzhypXj07JG+4wXRVABCQCEp/tHat9PbbdkjOmyddf73pigAgYBGU/uiKK6SXX7bnbf3Tn0xXAwABjaD0F8XFUk5O+fJYt91mth4ACBI8R+kPioqkAQOkP/xBys42XQ0ABBValC7w6sQ8RUX2s5HvvSeFh0tbt0pxcd6sAACCGkFZBx6f+6aoSLr5ZmnBAjskFyyQkpM9/V0BAL9CUPqqwkI7JBcutENy4UKpd2/TVQFA0CEofVFhoXTTTdIHH0gREdL770tXXWW6KgAISgSlL/rxR+mrr+yQ/OADKSXFdEUAELQISl/UrJm0erW0Y4fUq5fpagAgqPF4iK8oKJDWrCnfPussQhIAfABB6QtOnLCnoevVyx60AwDwGQSlaaUhuXSpFBYmxcSYrggA8Cvco3SBZblpyoETJ+y5Wpctk6KipMWLpZ493fO1AQBuQVDWRV1mHDh+XOrXT/r4Y6l+fWnJEnuycwCATyEoTSgokK67Tlqxwg7JpUulyy83XRUAoBLcozQhLExq3Vpq0ED66CNCEgB8GEFpQkiINH269OWX9oogAACfRVB6S36+9Oyz9kTnkh2WbdqYrQkAcErco/SG/HzpmmukzEzpu++k2bNNVwQAqCGC0tOOHpX69LFn3WnYUBo+3HRFAIBaICg96ehRe2msTz+VoqOl5cul3//edFUAgFogKD0lL88OybVr7ZD8+GMpMdF0VQCAWmIwjwtOOS+PZUn9+9shGRNjPy9JSAKAXyIo66DKiXkcDunRR+3lslaskLp08WZZAAA3ouvVU3r2tEe4RkWZrgQAUAe0KN0lJ8eeu/Xrr8v3EZIA4PdoUbpDTo6UkiJ98YX07bfS1q1SaKjpqgAAbuATLcpp06apVatWioyMVGJiojZs2FDt8fPnz1ebNm0UGRmp9u3ba+nSpV6qtBI//yxdeaUdkk2aSG+9RUgCQAAxHpTz5s1TWlqaxo4dq02bNqlDhw5KSUnRwYMHKz1+3bp1uuWWWzRs2DBt3rxZ/fr1U79+/fSf//zHy5XLHt165ZXShg3S6adLq1ZJF1/s/ToAAB7jsNy2CrFrEhMTdemll+qFF16QJDmdTiUkJOi+++7TqFGjTjo+NTVV+fn5Wrx4cdm+3//+9+rYsaNmzJhxyu+Xm5urmJgY5eTkKDo62qWavzt0VL0mf6LoouP6asqNdkhmZEgdOrj09QAA3lfTPDDaoiwsLNTGjRuVnJxcti8kJETJyclav359peesX7++wvGSlJKSUuXxBQUFys3NrfBym5ISqWlTuyVJSAJAQDIalIcPH1ZJSYni4uIq7I+Li1NWVlal52RlZdXq+PT0dMXExJS9EhIS3FO8ZK8ruWqVdNFF7vuaAACfEvCjXkePHq20tLSy7dzc3DqHZbOYSL11e6JCHA7pnNPrWiIAwIcZDcqmTZsqNDRU2dnZFfZnZ2crPj6+0nPi4+NrdXxERIQiIiLcU/Av6ofXU7dzm7r1awIAfJPRrtfw8HB16tRJGRkZZfucTqcyMjKUlJRU6TlJSUkVjpekFStWVHk8AAB1YbzrNS0tTUOGDFHnzp3VpUsXTZ06Vfn5+Ro6dKgkafDgwWrRooXS09MlSSNHjtTll1+uyZMnq0+fPpo7d66+/PJLvfTSSyZ/DABAgDIelKmpqTp06JDGjBmjrKwsdezYUcuWLSsbsLN3716FhJQ3fLt27ao5c+boiSee0GOPPabf/e53ev/999WuXTtTPwIAIIAZf47S29zxHCUAwP/5xXOUAAD4OoISAIBqEJQAAFSDoAQAoBoEJQAA1SAoAQCoBkEJAEA1CEoAAKpBUAIAUA2CEgCAahif69XbSmfsy83NNVwJAMCk0hw41UyuQReUeXl5klTnxZsBAIEhLy9PMTExVX4+6CZFdzqd+uGHH9SwYUM5HA6Xv05ubq4SEhK0b98+Jlf/Fa5L1bg2leO6VI1rUzl3XRfLspSXl6fmzZtXWKXqt4KuRRkSEqKWLVu67etFR0fzC1wJrkvVuDaV47pUjWtTOXdcl+pakqUYzAMAQDUISgAAqkFQuigiIkJjx45VRESE6VJ8CtelalybynFdqsa1qZy3r0vQDeYBAKA2aFECAFANghIAgGoQlAAAVIOgBACgGgRlNaZNm6ZWrVopMjJSiYmJ2rBhQ7XHz58/X23atFFkZKTat2+vpUuXeqlS76rNdZk5c6a6d++uxo0bq3HjxkpOTj7ldfRntf2dKTV37lw5HA7169fPswUaUtvr8vPPP2vEiBFq1qyZIiIidN555/Hv6RdTp07V+eefr6ioKCUkJOiBBx7QiRMnvFStd6xZs0Z9+/ZV8+bN5XA49P7775/ynMzMTF1yySWKiIjQueeeq1dffdV9BVmo1Ny5c63w8HBr9uzZ1n//+19r+PDhVqNGjazs7OxKj//ss8+s0NBQ67nnnrO+/vpr64knnrDCwsKsrVu3erlyz6rtdRkwYIA1bdo0a/Pmzda2bdusW2+91YqJibG+//57L1fuebW9NqV27dpltWjRwurevbt13XXXeadYL6rtdSkoKLA6d+5s9e7d21q7dq21a9cuKzMz09qyZYuXK/e82l6bt956y4qIiLDeeusta9euXdby5cutZs2aWQ888ICXK/espUuXWo8//ri1YMECS5K1cOHCao/fuXOnVb9+fSstLc36+uuvrX/84x9WaGiotWzZMrfUQ1BWoUuXLtaIESPKtktKSqzmzZtb6enplR5/0003WX369KmwLzEx0brzzjs9Wqe31fa6/FZxcbHVsGFD67XXXvNUica4cm2Ki4utrl27Wi+//LI1ZMiQgAzK2l6X6dOnW2effbZVWFjorRKNqe21GTFihNWzZ88K+9LS0qxu3bp5tE6TahKUjzzyiHXhhRdW2JeammqlpKS4pQa6XitRWFiojRs3Kjk5uWxfSEiIkpOTtX79+krPWb9+fYXjJSklJaXK4/2RK9flt44dO6aioiI1adLEU2Ua4eq1efrppxUbG6thw4Z5o0yvc+W6LFq0SElJSRoxYoTi4uLUrl07TZgwQSUlJd4q2ytcuTZdu3bVxo0by7pnd+7cqaVLl6p3795eqdlXefr9N+gmRa+Jw4cPq6SkRHFxcRX2x8XF6Ztvvqn0nKysrEqPz8rK8lid3ubKdfmtRx99VM2bNz/pl9rfuXJt1q5dq1mzZmnLli1eqNAMV67Lzp07tWrVKg0cOFBLly7Vjh07dM8996ioqEhjx471Rtle4cq1GTBggA4fPqzLLrtMlmWpuLhYd911lx577DFvlOyzqnr/zc3N1fHjxxUVFVWnr0+LEl4zceJEzZ07VwsXLlRkZKTpcozKy8vToEGDNHPmTDVt2tR0OT7F6XQqNjZWL730kjp16qTU1FQ9/vjjmjFjhunSjMvMzNSECRP0z3/+U5s2bdKCBQu0ZMkSjR8/3nRpAY0WZSWaNm2q0NBQZWdnV9ifnZ2t+Pj4Ss+Jj4+v1fH+yJXrUmrSpEmaOHGiVq5cqYsuusiTZRpR22vz3Xffaffu3erbt2/ZPqfTKUmqV6+etm/frnPOOcezRXuBK78zzZo1U1hYmEJDQ8v2XXDBBcrKylJhYaHCw8M9WrO3uHJtnnzySQ0aNEi33367JKl9+/bKz8/XHXfcoccff7zaNRUDWVXvv9HR0XVuTUq0KCsVHh6uTp06KSMjo2yf0+lURkaGkpKSKj0nKSmpwvGStGLFiiqP90euXBdJeu655zR+/HgtW7ZMnTt39kapXlfba9OmTRtt3bpVW7ZsKXtde+216tGjh7Zs2aKEhARvlu8xrvzOdOvWTTt27Cj7w0GSvv32WzVr1ixgQlJy7docO3bspDAs/YPCCuJpuz3+/uuWIUEBaO7cuVZERIT16quvWl9//bV1xx13WI0aNbKysrIsy7KsQYMGWaNGjSo7/rPPPrPq1atnTZo0ydq2bZs1duzYgH08pDbXZeLEiVZ4eLj17rvvWgcOHCh75eXlmfoRPKa21+a3AnXUa22vy969e62GDRta9957r7V9+3Zr8eLFVmxsrPXMM8+Y+hE8prbXZuzYsVbDhg2tt99+29q5c6f18ccfW+ecc4510003mfoRPCIvL8/avHmztXnzZkuSNWXKFGvz5s3Wnj17LMuyrFGjRlmDBg0qO7708ZCHH37Y2rZtmzVt2jQeD/GWf/zjH9aZZ55phYeHW126dLE+//zzss9dfvnl1pAhQyoc/84771jnnXeeFR4ebl144YXWkiVLvFyxd9Tmupx11lmWpJNeY8eO9X7hXlDb35lfC9SgtKzaX5d169ZZiYmJVkREhHX22Wdbzz77rFVcXOzlqr2jNtemqKjIeuqpp6xzzjnHioyMtBISEqx77rnH+umnn7xfuAetXr260veN0msxZMgQ6/LLLz/pnI4dO1rh4eHW2Wefbb3yyituq4dltgAAqAb3KAEAqAZBCQBANQhKAACqQVACAFANghIAgGoQlAAAVIOgBACgGgQlAADVICiBAHHrrbfK4XCc9NqxY0eFz4WHh+vcc8/V008/reLiYkn2qhS/PueMM85Q7969tXXrVsM/FWAeQQkEkKuuukoHDhyo8GrdunWFz/3vf//Tgw8+qKeeekp//etfK5y/fft2HThwQMuXL1dBQYH69OmjwsJCEz8K4DMISiCAREREKD4+vsKrdHWJ0s+dddZZuvvuu5WcnKxFixZVOD82Nlbx8fG65JJLdP/992vfvn01XpQbCFQEJRCkoqKiqmwt5uTkaO7cuZIUUEtbAa5g4WYggCxevFinnXZa2fbVV1+t+fPnVzjGsixlZGRo+fLluu+++yp8rmXLlpKk/Px8SdK1116rNm3aeLhqwLcRlEAA6dGjh6ZPn1623aBBg7KPS0O0qKhITqdTAwYM0FNPPVXh/E8//VT169fX559/rgkTJmjGjBneKh3wWQQlEEAaNGigc889t9LPlYZoeHi4mjdvrnr1Tv7n37p1azVq1Ejnn3++Dh48qNTUVK1Zs8bTZQM+jXuUQJAoDdEzzzyz0pD8rREjRug///mPFi5c6IXqAN9FUAKoVP369TV8+HCNHTtWrO+OYEZQAqjSvffeq23btp00IAgIJg6LPxUBAKgSLUoAAKpBUAIAUA2CEgCAahCUAABUg6AEAKAaBCUAANUgKAEAqAZBCQBANQhKAACqQVACAFANghIAgGoQlAAAVOP/AbGCjsgNmWMpAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 500x500 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from pyspark.ml.feature import MinMaxScaler\n",
    "from pyspark.ml.feature import VectorAssembler\n",
    "from pyspark.ml.classification import LogisticRegression\n",
    "from pyspark.ml.evaluation import BinaryClassificationEvaluator\n",
    "from pyspark.ml import Pipeline\n",
    "import pyspark.ml.feature as MF\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "imputer = MF.Imputer(strategy=\"mean\",inputCols=[\"calories\",\"protein\",\"fat\",\"sodium\"],\n",
    "                     outputCols=[\"calories_i\",\"protein_i\",\"fat_i\",\"sodium_i\"])\n",
    "\n",
    "continuous_assembler = MF.VectorAssembler(inputCols=[\"rating\",\"calories_i\",\"protein_i\",\"fat_i\",\"sodium_i\"],\n",
    "                                         outputCol = \"continuous\")\n",
    "\n",
    "continuous_scaler = MF.MinMaxScaler(inputCol=\"continuous\",outputCol=\"continuous_scaled\")\n",
    "\n",
    "preml_assembler = MF.VectorAssembler(inputCols=BINARY_COLUMNS + [\"continuous_scaled\"] + [\"protein_ratio\",\"fat_ratio\"],\n",
    "                                    outputCol = \"features\")\n",
    "\n",
    "lr = LogisticRegression(featuresCol=\"features\",labelCol=\"dessert\",predictionCol=\"prediction\")\n",
    "\n",
    "food_pipeline = Pipeline(stages = [extreme_value_capper_cal,extreme_value_capper_pro,extreme_value_capper_fat,extreme_value_capper_sod,\n",
    "                                   imputer,continuous_assembler,continuous_scaler,preml_assembler,lr])\n",
    "\n",
    "\n",
    "train,test = food.randomSplit([0.7,0.3],13)\n",
    "# train.cache()\n",
    "food_pipeline_model = food_pipeline.fit(train)\n",
    "results = food_pipeline_model.transform(test)\n",
    "\n",
    "evaluator = BinaryClassificationEvaluator(labelCol=\"dessert\",rawPredictionCol =\"rawPrediction\", metricName = \"areaUnderROC\")\n",
    "accuracy = evaluator.evaluate(results)\n",
    "print(f\"Area under ROC = {accuracy}\")\n",
    "\n",
    "lr_model = food_pipeline_model.stages[-1]\n",
    "plt.figure(figsize=(5,5))\n",
    "plt.plot([0,1],[0,1],'r--')\n",
    "plt.plot(lr_model.summary.roc.select(\"FPR\").collect(),lr_model.summary.roc.select(\"TPR\").collect())\n",
    "plt.xlabel(\"FPR\")\n",
    "plt.ylabel(\"TPR\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "95c5ebc1-c480-4e77-86a8-7f243856b639",
   "metadata": {},
   "outputs": [
    {
     "ename": "Py4JJavaError",
     "evalue": "An error occurred while calling o2738.saveAsTextFile.\n: java.lang.RuntimeException: java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset. -see https://wiki.apache.org/hadoop/WindowsProblems\r\n\tat org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:735)\r\n\tat org.apache.hadoop.util.Shell.getSetPermissionCommand(Shell.java:270)\r\n\tat org.apache.hadoop.util.Shell.getSetPermissionCommand(Shell.java:286)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:978)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkOneDirWithMode(RawLocalFileSystem.java:660)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:700)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)\r\n\tat org.apache.hadoop.fs.ChecksumFileSystem.mkdirs(ChecksumFileSystem.java:788)\r\n\tat org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.setupJob(FileOutputCommitter.java:356)\r\n\tat org.apache.hadoop.mapred.FileOutputCommitter.setupJob(FileOutputCommitter.java:131)\r\n\tat org.apache.hadoop.mapred.OutputCommitter.setupJob(OutputCommitter.java:265)\r\n\tat org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.setupJob(HadoopMapReduceCommitProtocol.scala:188)\r\n\tat org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:79)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsHadoopDataset$1(PairRDDFunctions.scala:1091)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1089)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsHadoopFile$4(PairRDDFunctions.scala:1062)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1027)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsHadoopFile$3(PairRDDFunctions.scala:1009)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1008)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsHadoopFile$2(PairRDDFunctions.scala:965)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:963)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$saveAsTextFile$2(RDD.scala:1623)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1623)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$saveAsTextFile$1(RDD.scala:1609)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1609)\r\n\tat org.apache.spark.api.java.JavaRDDLike.saveAsTextFile(JavaRDDLike.scala:564)\r\n\tat org.apache.spark.api.java.JavaRDDLike.saveAsTextFile$(JavaRDDLike.scala:563)\r\n\tat org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:45)\r\n\tat java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)\r\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:580)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\r\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\r\n\tat java.base/java.lang.Thread.run(Thread.java:1570)\r\nCaused by: java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset. -see https://wiki.apache.org/hadoop/WindowsProblems\r\n\tat org.apache.hadoop.util.Shell.fileNotFoundException(Shell.java:547)\r\n\tat org.apache.hadoop.util.Shell.getHadoopHomeDir(Shell.java:568)\r\n\tat org.apache.hadoop.util.Shell.getQualifiedBin(Shell.java:591)\r\n\tat org.apache.hadoop.util.Shell.<clinit>(Shell.java:688)\r\n\tat org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:79)\r\n\tat org.apache.hadoop.conf.Configuration.getTimeDurationHelper(Configuration.java:1907)\r\n\tat org.apache.hadoop.conf.Configuration.getTimeDuration(Configuration.java:1867)\r\n\tat org.apache.hadoop.conf.Configuration.getTimeDuration(Configuration.java:1840)\r\n\tat org.apache.hadoop.util.ShutdownHookManager.getShutdownTimeout(ShutdownHookManager.java:183)\r\n\tat org.apache.hadoop.util.ShutdownHookManager$HookEntry.<init>(ShutdownHookManager.java:207)\r\n\tat org.apache.hadoop.util.ShutdownHookManager.addShutdownHook(ShutdownHookManager.java:304)\r\n\tat org.apache.spark.util.SparkShutdownHookManager.install(ShutdownHookManager.scala:181)\r\n\tat org.apache.spark.util.ShutdownHookManager$.shutdownHooks$lzycompute(ShutdownHookManager.scala:50)\r\n\tat org.apache.spark.util.ShutdownHookManager$.shutdownHooks(ShutdownHookManager.scala:48)\r\n\tat org.apache.spark.util.ShutdownHookManager$.addShutdownHook(ShutdownHookManager.scala:153)\r\n\tat org.apache.spark.util.ShutdownHookManager$.<init>(ShutdownHookManager.scala:58)\r\n\tat org.apache.spark.util.ShutdownHookManager$.<clinit>(ShutdownHookManager.scala)\r\n\tat org.apache.spark.util.Utils$.createTempDir(Utils.scala:242)\r\n\tat org.apache.spark.util.SparkFileUtils.createTempDir(SparkFileUtils.scala:103)\r\n\tat org.apache.spark.util.SparkFileUtils.createTempDir$(SparkFileUtils.scala:102)\r\n\tat org.apache.spark.util.Utils$.createTempDir(Utils.scala:94)\r\n\tat org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:372)\r\n\tat org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:964)\r\n\tat org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:194)\r\n\tat org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:217)\r\n\tat org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)\r\n\tat org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1120)\r\n\tat org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1129)\r\n\tat org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)\r\nCaused by: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.\r\n\tat org.apache.hadoop.util.Shell.checkHadoopHomeInner(Shell.java:467)\r\n\tat org.apache.hadoop.util.Shell.checkHadoopHome(Shell.java:438)\r\n\tat org.apache.hadoop.util.Shell.<clinit>(Shell.java:515)\r\n\t... 25 more\r\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mPy4JJavaError\u001b[0m                             Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[19], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mfood_pipeline_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfood_pipeline.model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\AppData\\Local\\mambaforge\\envs\\python39ml\\lib\\site-packages\\pyspark\\ml\\util.py:262\u001b[0m, in \u001b[0;36mMLWritable.save\u001b[1;34m(self, path)\u001b[0m\n\u001b[0;32m    260\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msave\u001b[39m(\u001b[38;5;28mself\u001b[39m, path: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    261\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Save this ML instance to the given path, a shortcut of 'write().save(path)'.\"\"\"\u001b[39;00m\n\u001b[1;32m--> 262\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\AppData\\Local\\mambaforge\\envs\\python39ml\\lib\\site-packages\\pyspark\\ml\\util.py:156\u001b[0m, in \u001b[0;36mMLWriter.save\u001b[1;34m(self, path)\u001b[0m\n\u001b[0;32m    154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mshouldOverwrite:\n\u001b[0;32m    155\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handleOverwrite(path)\n\u001b[1;32m--> 156\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msaveImpl\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\AppData\\Local\\mambaforge\\envs\\python39ml\\lib\\site-packages\\pyspark\\ml\\pipeline.py:266\u001b[0m, in \u001b[0;36mPipelineModelWriter.saveImpl\u001b[1;34m(self, path)\u001b[0m\n\u001b[0;32m    264\u001b[0m stages \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minstance\u001b[38;5;241m.\u001b[39mstages\n\u001b[0;32m    265\u001b[0m PipelineSharedReadWrite\u001b[38;5;241m.\u001b[39mvalidateStages(cast(List[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPipelineStage\u001b[39m\u001b[38;5;124m\"\u001b[39m], stages))\n\u001b[1;32m--> 266\u001b[0m \u001b[43mPipelineSharedReadWrite\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msaveImpl\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    267\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minstance\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\u001b[43mList\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPipelineStage\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstages\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\n\u001b[0;32m    268\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\AppData\\Local\\mambaforge\\envs\\python39ml\\lib\\site-packages\\pyspark\\ml\\pipeline.py:413\u001b[0m, in \u001b[0;36mPipelineSharedReadWrite.saveImpl\u001b[1;34m(instance, stages, sc, path)\u001b[0m\n\u001b[0;32m    411\u001b[0m stageUids \u001b[38;5;241m=\u001b[39m [stage\u001b[38;5;241m.\u001b[39muid \u001b[38;5;28;01mfor\u001b[39;00m stage \u001b[38;5;129;01min\u001b[39;00m stages]\n\u001b[0;32m    412\u001b[0m jsonParams \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstageUids\u001b[39m\u001b[38;5;124m\"\u001b[39m: stageUids, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlanguage\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPython\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[1;32m--> 413\u001b[0m \u001b[43mDefaultParamsWriter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msaveMetadata\u001b[49m\u001b[43m(\u001b[49m\u001b[43minstance\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparamMap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjsonParams\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    414\u001b[0m stagesDir \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstages\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    415\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m index, stage \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(stages):\n",
      "File \u001b[1;32mD:\\AppData\\Local\\mambaforge\\envs\\python39ml\\lib\\site-packages\\pyspark\\ml\\util.py:466\u001b[0m, in \u001b[0;36mDefaultParamsWriter.saveMetadata\u001b[1;34m(instance, path, sc, extraMetadata, paramMap)\u001b[0m\n\u001b[0;32m    462\u001b[0m metadataPath \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmetadata\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    463\u001b[0m metadataJson \u001b[38;5;241m=\u001b[39m DefaultParamsWriter\u001b[38;5;241m.\u001b[39m_get_metadata_to_save(\n\u001b[0;32m    464\u001b[0m     instance, sc, extraMetadata, paramMap\n\u001b[0;32m    465\u001b[0m )\n\u001b[1;32m--> 466\u001b[0m \u001b[43msc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparallelize\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmetadataJson\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msaveAsTextFile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmetadataPath\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\AppData\\Local\\mambaforge\\envs\\python39ml\\lib\\site-packages\\pyspark\\rdd.py:3425\u001b[0m, in \u001b[0;36mRDD.saveAsTextFile\u001b[1;34m(self, path, compressionCodecClass)\u001b[0m\n\u001b[0;32m   3423\u001b[0m     keyed\u001b[38;5;241m.\u001b[39m_jrdd\u001b[38;5;241m.\u001b[39mmap(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mctx\u001b[38;5;241m.\u001b[39m_jvm\u001b[38;5;241m.\u001b[39mBytesToString())\u001b[38;5;241m.\u001b[39msaveAsTextFile(path, compressionCodec)\n\u001b[0;32m   3424\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 3425\u001b[0m     \u001b[43mkeyed\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_jrdd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_jvm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mBytesToString\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msaveAsTextFile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\AppData\\Local\\mambaforge\\envs\\python39ml\\lib\\site-packages\\py4j\\java_gateway.py:1322\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m   1316\u001b[0m command \u001b[38;5;241m=\u001b[39m proto\u001b[38;5;241m.\u001b[39mCALL_COMMAND_NAME \u001b[38;5;241m+\u001b[39m\\\n\u001b[0;32m   1317\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_header \u001b[38;5;241m+\u001b[39m\\\n\u001b[0;32m   1318\u001b[0m     args_command \u001b[38;5;241m+\u001b[39m\\\n\u001b[0;32m   1319\u001b[0m     proto\u001b[38;5;241m.\u001b[39mEND_COMMAND_PART\n\u001b[0;32m   1321\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client\u001b[38;5;241m.\u001b[39msend_command(command)\n\u001b[1;32m-> 1322\u001b[0m return_value \u001b[38;5;241m=\u001b[39m \u001b[43mget_return_value\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1323\u001b[0m \u001b[43m    \u001b[49m\u001b[43manswer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgateway_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1325\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m temp_arg \u001b[38;5;129;01min\u001b[39;00m temp_args:\n\u001b[0;32m   1326\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(temp_arg, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_detach\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n",
      "File \u001b[1;32mD:\\AppData\\Local\\mambaforge\\envs\\python39ml\\lib\\site-packages\\pyspark\\errors\\exceptions\\captured.py:179\u001b[0m, in \u001b[0;36mcapture_sql_exception.<locals>.deco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m    177\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdeco\u001b[39m(\u001b[38;5;241m*\u001b[39ma: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[0;32m    178\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 179\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m f(\u001b[38;5;241m*\u001b[39ma, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw)\n\u001b[0;32m    180\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m Py4JJavaError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m    181\u001b[0m         converted \u001b[38;5;241m=\u001b[39m convert_exception(e\u001b[38;5;241m.\u001b[39mjava_exception)\n",
      "File \u001b[1;32mD:\\AppData\\Local\\mambaforge\\envs\\python39ml\\lib\\site-packages\\py4j\\protocol.py:326\u001b[0m, in \u001b[0;36mget_return_value\u001b[1;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[0;32m    324\u001b[0m value \u001b[38;5;241m=\u001b[39m OUTPUT_CONVERTER[\u001b[38;5;28mtype\u001b[39m](answer[\u001b[38;5;241m2\u001b[39m:], gateway_client)\n\u001b[0;32m    325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m answer[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m REFERENCE_TYPE:\n\u001b[1;32m--> 326\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m Py4JJavaError(\n\u001b[0;32m    327\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[0;32m    328\u001b[0m         \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name), value)\n\u001b[0;32m    329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    330\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m Py4JError(\n\u001b[0;32m    331\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m. Trace:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{3}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[0;32m    332\u001b[0m         \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name, value))\n",
      "\u001b[1;31mPy4JJavaError\u001b[0m: An error occurred while calling o2738.saveAsTextFile.\n: java.lang.RuntimeException: java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset. -see https://wiki.apache.org/hadoop/WindowsProblems\r\n\tat org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:735)\r\n\tat org.apache.hadoop.util.Shell.getSetPermissionCommand(Shell.java:270)\r\n\tat org.apache.hadoop.util.Shell.getSetPermissionCommand(Shell.java:286)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:978)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkOneDirWithMode(RawLocalFileSystem.java:660)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:700)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)\r\n\tat org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)\r\n\tat org.apache.hadoop.fs.ChecksumFileSystem.mkdirs(ChecksumFileSystem.java:788)\r\n\tat org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.setupJob(FileOutputCommitter.java:356)\r\n\tat org.apache.hadoop.mapred.FileOutputCommitter.setupJob(FileOutputCommitter.java:131)\r\n\tat org.apache.hadoop.mapred.OutputCommitter.setupJob(OutputCommitter.java:265)\r\n\tat org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.setupJob(HadoopMapReduceCommitProtocol.scala:188)\r\n\tat org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:79)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsHadoopDataset$1(PairRDDFunctions.scala:1091)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1089)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsHadoopFile$4(PairRDDFunctions.scala:1062)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1027)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsHadoopFile$3(PairRDDFunctions.scala:1009)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1008)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsHadoopFile$2(PairRDDFunctions.scala:965)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:963)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$saveAsTextFile$2(RDD.scala:1623)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1623)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$saveAsTextFile$1(RDD.scala:1609)\r\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:410)\r\n\tat org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1609)\r\n\tat org.apache.spark.api.java.JavaRDDLike.saveAsTextFile(JavaRDDLike.scala:564)\r\n\tat org.apache.spark.api.java.JavaRDDLike.saveAsTextFile$(JavaRDDLike.scala:563)\r\n\tat org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:45)\r\n\tat java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)\r\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:580)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\r\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\r\n\tat java.base/java.lang.Thread.run(Thread.java:1570)\r\nCaused by: java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset. -see https://wiki.apache.org/hadoop/WindowsProblems\r\n\tat org.apache.hadoop.util.Shell.fileNotFoundException(Shell.java:547)\r\n\tat org.apache.hadoop.util.Shell.getHadoopHomeDir(Shell.java:568)\r\n\tat org.apache.hadoop.util.Shell.getQualifiedBin(Shell.java:591)\r\n\tat org.apache.hadoop.util.Shell.<clinit>(Shell.java:688)\r\n\tat org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:79)\r\n\tat org.apache.hadoop.conf.Configuration.getTimeDurationHelper(Configuration.java:1907)\r\n\tat org.apache.hadoop.conf.Configuration.getTimeDuration(Configuration.java:1867)\r\n\tat org.apache.hadoop.conf.Configuration.getTimeDuration(Configuration.java:1840)\r\n\tat org.apache.hadoop.util.ShutdownHookManager.getShutdownTimeout(ShutdownHookManager.java:183)\r\n\tat org.apache.hadoop.util.ShutdownHookManager$HookEntry.<init>(ShutdownHookManager.java:207)\r\n\tat org.apache.hadoop.util.ShutdownHookManager.addShutdownHook(ShutdownHookManager.java:304)\r\n\tat org.apache.spark.util.SparkShutdownHookManager.install(ShutdownHookManager.scala:181)\r\n\tat org.apache.spark.util.ShutdownHookManager$.shutdownHooks$lzycompute(ShutdownHookManager.scala:50)\r\n\tat org.apache.spark.util.ShutdownHookManager$.shutdownHooks(ShutdownHookManager.scala:48)\r\n\tat org.apache.spark.util.ShutdownHookManager$.addShutdownHook(ShutdownHookManager.scala:153)\r\n\tat org.apache.spark.util.ShutdownHookManager$.<init>(ShutdownHookManager.scala:58)\r\n\tat org.apache.spark.util.ShutdownHookManager$.<clinit>(ShutdownHookManager.scala)\r\n\tat org.apache.spark.util.Utils$.createTempDir(Utils.scala:242)\r\n\tat org.apache.spark.util.SparkFileUtils.createTempDir(SparkFileUtils.scala:103)\r\n\tat org.apache.spark.util.SparkFileUtils.createTempDir$(SparkFileUtils.scala:102)\r\n\tat org.apache.spark.util.Utils$.createTempDir(Utils.scala:94)\r\n\tat org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:372)\r\n\tat org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:964)\r\n\tat org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:194)\r\n\tat org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:217)\r\n\tat org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)\r\n\tat org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1120)\r\n\tat org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1129)\r\n\tat org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)\r\nCaused by: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.\r\n\tat org.apache.hadoop.util.Shell.checkHadoopHomeInner(Shell.java:467)\r\n\tat org.apache.hadoop.util.Shell.checkHadoopHome(Shell.java:438)\r\n\tat org.apache.hadoop.util.Shell.<clinit>(Shell.java:515)\r\n\t... 25 more\r\n"
     ]
    }
   ],
   "source": [
    "food_pipeline_model.save(\"food_pipeline.model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "537225ad-57ca-471e-b91e-bc1757a77e99",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
